library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(RColorBrewer)
library(paletteer)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(here)
## here() starts at /Users/jaynatoye/Documents/GitHub/GitHub/Project/BIS15W2022_group3
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
animal_bites <- readr::read_csv("data/Animal_Bites.csv")%>%clean_names()
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 12074 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): SpeciesIDDesc, BreedIDDesc, breed, GenderIDDesc, color, AdvIssuedY...
## dbl (3): id, vaccination_yrs, victim_zip
## lgl (1): DispositionIDDesc
## dttm (5): bite_date, vaccination_date, quarantine_date, head_sent_date, rele...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
animal_bites
## # A tibble: 12,074 × 18
## id bite_date species_id_desc breed_id_desc breed gender_id_desc
## <dbl> <dttm> <chr> <chr> <chr> <chr>
## 1 518378 2012-06-30 00:00:00 SKUNK <NA> SKUNK MALE
## 2 518378 2012-06-30 00:00:00 SKUNK <NA> SKUNK MALE
## 3 571527 2014-11-01 00:00:00 DOG <NA> BEAG… FEMALE
## 4 571527 2014-11-01 00:00:00 DOG <NA> BEAG… FEMALE
## 5 577969 2015-03-14 00:00:00 DOG <NA> <NA> <NA>
## 6 577969 2015-03-14 00:00:00 DOG <NA> <NA> <NA>
## 7 581174 2015-04-26 00:00:00 DOG BEAGLE MIX MALE
## 8 581174 2015-04-26 00:00:00 DOG BEAGLE MIX MALE
## 9 585126 2015-06-27 00:00:00 DOG LABRADOR RET… MIX MALE
## 10 585126 2015-06-27 00:00:00 DOG LABRADOR RET… MIX MALE
## # … with 12,064 more rows, and 12 more variables: color <chr>,
## # vaccination_yrs <dbl>, vaccination_date <dttm>, victim_zip <dbl>,
## # adv_issued_yn_desc <chr>, where_bitten_id_desc <chr>,
## # quarantine_date <dttm>, disposition_id_desc <lgl>, head_sent_date <dttm>,
## # release_date <dttm>, results_id_desc <chr>, followup_yn_desc <chr>
#Cleaning up the Data
animal_bites <- animal_bites%>%
naniar::replace_with_na(replace=list(gender_id_desc="UNKNOWN", results_id_desc="UNKNOWN", disposition_id_desc="UNKNOWN", vaccination_date="<NA>", head_sent_date="<NA>", release_date="<NA>"))
animal_bites
## # A tibble: 12,074 × 18
## id bite_date species_id_desc breed_id_desc breed gender_id_desc
## <dbl> <dttm> <chr> <chr> <chr> <chr>
## 1 518378 2012-06-30 00:00:00 SKUNK <NA> SKUNK MALE
## 2 518378 2012-06-30 00:00:00 SKUNK <NA> SKUNK MALE
## 3 571527 2014-11-01 00:00:00 DOG <NA> BEAG… FEMALE
## 4 571527 2014-11-01 00:00:00 DOG <NA> BEAG… FEMALE
## 5 577969 2015-03-14 00:00:00 DOG <NA> <NA> <NA>
## 6 577969 2015-03-14 00:00:00 DOG <NA> <NA> <NA>
## 7 581174 2015-04-26 00:00:00 DOG BEAGLE MIX MALE
## 8 581174 2015-04-26 00:00:00 DOG BEAGLE MIX MALE
## 9 585126 2015-06-27 00:00:00 DOG LABRADOR RET… MIX MALE
## 10 585126 2015-06-27 00:00:00 DOG LABRADOR RET… MIX MALE
## # … with 12,064 more rows, and 12 more variables: color <chr>,
## # vaccination_yrs <dbl>, vaccination_date <dttm>, victim_zip <dbl>,
## # adv_issued_yn_desc <chr>, where_bitten_id_desc <chr>,
## # quarantine_date <dttm>, disposition_id_desc <lgl>, head_sent_date <dttm>,
## # release_date <dttm>, results_id_desc <chr>, followup_yn_desc <chr>
animal_bites%>%
naniar::miss_var_summary()
## # A tibble: 18 × 3
## variable n_miss pct_miss
## <chr> <int> <dbl>
## 1 disposition_id_desc 12074 100
## 2 quarantine_date 12002 99.4
## 3 adv_issued_yn_desc 11814 97.8
## 4 results_id_desc 11134 92.2
## 5 head_sent_date 11084 91.8
## 6 breed 9526 78.9
## 7 vaccination_yrs 8532 70.7
## 8 vaccination_date 7864 65.1
## 9 release_date 7356 60.9
## 10 breed_id_desc 5524 45.8
## 11 gender_id_desc 4566 37.8
## 12 color 3134 26.0
## 13 where_bitten_id_desc 1094 9.06
## 14 bite_date 926 7.67
## 15 victim_zip 336 2.78
## 16 species_id_desc 72 0.596
## 17 id 0 0
## 18 followup_yn_desc 0 0
Most of the results from the rabies test are missing as well as when the sample was sent in to the lab.
animal_bites <- animal_bites%>%
separate(bite_date, into=c("bite_year", "bite_month", "bite_day"), sep = "-")
animal_bites
## # A tibble: 12,074 × 20
## id bite_year bite_month bite_day species_id_desc breed_id_desc breed
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 518378 2012 06 30 SKUNK <NA> SKUNK
## 2 518378 2012 06 30 SKUNK <NA> SKUNK
## 3 571527 2014 11 01 DOG <NA> BEAGLE …
## 4 571527 2014 11 01 DOG <NA> BEAGLE …
## 5 577969 2015 03 14 DOG <NA> <NA>
## 6 577969 2015 03 14 DOG <NA> <NA>
## 7 581174 2015 04 26 DOG BEAGLE MIX
## 8 581174 2015 04 26 DOG BEAGLE MIX
## 9 585126 2015 06 27 DOG LABRADOR RETRIV MIX
## 10 585126 2015 06 27 DOG LABRADOR RETRIV MIX
## # … with 12,064 more rows, and 13 more variables: gender_id_desc <chr>,
## # color <chr>, vaccination_yrs <dbl>, vaccination_date <dttm>,
## # victim_zip <dbl>, adv_issued_yn_desc <chr>, where_bitten_id_desc <chr>,
## # quarantine_date <dttm>, disposition_id_desc <lgl>, head_sent_date <dttm>,
## # release_date <dttm>, results_id_desc <chr>, followup_yn_desc <chr>
#Questions we wanted to ask What are the results of the rabies test?
animal_bites%>%
count(results_id_desc)
## # A tibble: 3 × 2
## results_id_desc n
## <chr> <int>
## 1 NEGATIVE 916
## 2 POSITIVE 24
## 3 <NA> 11134
animal_bites%>%
filter(results_id_desc!="NA")%>%
ggplot(aes(results_id_desc, fill=results_id_desc)) + geom_bar() + labs(title="Rabies Test Results", x="Results", y="Number of Reports") + theme_classic()
animal_bites%>%
ggplot(aes(results_id_desc, fill=results_id_desc)) + geom_bar() + labs(title="Rabies Test Results (with NA)", x="Results", y="Number of Reports") + theme_classic()
What animal has the most reported bites?
animal_bites%>%
count(species_id_desc)%>%
arrange(desc(n))
## # A tibble: 10 × 2
## species_id_desc n
## <chr> <int>
## 1 DOG 9458
## 2 CAT 1760
## 3 BAT 674
## 4 <NA> 72
## 5 RACCOON 54
## 6 OTHER 26
## 7 FERRET 14
## 8 SKUNK 8
## 9 HORSE 4
## 10 RAT 4
animal_bites%>%
select(species_id_desc)%>%
ggplot(aes(species_id_desc, fill=species_id_desc)) + geom_bar() + labs(title="Bite Reports by Animal Species", x="Species", y="Number of Reports" )
What animal has the most positive tests for rabies?
animal_bites%>%
select(species_id_desc, results_id_desc)%>%
filter(results_id_desc=="POSITIVE")%>%
count(species_id_desc)
## # A tibble: 2 × 2
## species_id_desc n
## <chr> <int>
## 1 BAT 22
## 2 DOG 2
animal_bites%>%
select(species_id_desc, results_id_desc)%>%
filter(results_id_desc=="POSITIVE")%>%
ggplot(aes(species_id_desc, fill=species_id_desc)) + geom_bar() + labs(title="Positive Cases by Species", x="Species", y="Count") + theme_classic()
What breed of dog has the most reported bites?
animal_bites%>%
filter(species_id_desc=="DOG")
## # A tibble: 9,458 × 20
## id bite_year bite_month bite_day species_id_desc breed_id_desc breed
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 571527 2014 11 01 DOG <NA> BEAGLE …
## 2 571527 2014 11 01 DOG <NA> BEAGLE …
## 3 577969 2015 03 14 DOG <NA> <NA>
## 4 577969 2015 03 14 DOG <NA> <NA>
## 5 581174 2015 04 26 DOG BEAGLE MIX
## 6 581174 2015 04 26 DOG BEAGLE MIX
## 7 585126 2015 06 27 DOG LABRADOR RETRIV MIX
## 8 585126 2015 06 27 DOG LABRADOR RETRIV MIX
## 9 605326 2016 05 21 DOG COON HOUND <NA>
## 10 605326 2016 05 21 DOG COON HOUND <NA>
## # … with 9,448 more rows, and 13 more variables: gender_id_desc <chr>,
## # color <chr>, vaccination_yrs <dbl>, vaccination_date <dttm>,
## # victim_zip <dbl>, adv_issued_yn_desc <chr>, where_bitten_id_desc <chr>,
## # quarantine_date <dttm>, disposition_id_desc <lgl>, head_sent_date <dttm>,
## # release_date <dttm>, results_id_desc <chr>, followup_yn_desc <chr>
top_breed_id <- animal_bites%>%
filter(species_id_desc=="DOG")%>%
count(breed_id_desc)%>%
arrange(desc(n))%>%
top_n(11)%>%
filter(breed_id_desc!="NA")
## Selecting by n
top_breed_id
## # A tibble: 10 × 2
## breed_id_desc n
## <chr> <int>
## 1 PIT BULL 2210
## 2 GERM SHEPHERD 682
## 3 LABRADOR RETRIV 474
## 4 BOXER 236
## 5 CHICHAUHUA 232
## 6 BEAGLE 214
## 7 ROTTWEILER 172
## 8 AAUST. TERR. 150
## 9 SHIH TZU 134
## 10 GREAT DANE 124
top_breed_id%>%
ggplot(aes(x=breed_id_desc, y=n, fill=breed_id_desc)) + geom_col() +labs(title="Bites by Dog Breed ID", x="Breed ID", y="Number of Bites") + theme_classic() + theme(axis.text.x = element_text(angle = 30, hjust = 1))
top_breed <- animal_bites%>%
filter(species_id_desc=="DOG")%>%
count(breed)%>%
arrange(desc(n))%>%
top_n(11)%>%
filter(breed!="NA")
## Selecting by n
top_breed
## # A tibble: 10 × 2
## breed n
## <chr> <int>
## 1 MIX 716
## 2 MIXED 110
## 3 JACK RUS 54
## 4 LAB MIX 54
## 5 HOUND MIX 40
## 6 GOLDEN DOODLE 30
## 7 TERRIER 30
## 8 STAFFORDSHIRE 28
## 9 JACK RUSSELL 26
## 10 BOXER MIX 22
top_breed%>%
ggplot(aes(x=breed, y=n, fill=breed)) + geom_col() +labs(title="Bites by Dog Breed", x="Breed", y="Number of Bites") + theme_classic() + theme(axis.text.x = element_text(angle = 30, hjust = 1))
Where was data collected from?
animal_bites%>%
count(victim_zip)%>%
arrange(desc(n))%>%
top_n(10
)
## Selecting by n
## # A tibble: 10 × 2
## victim_zip n
## <dbl> <int>
## 1 40272 634
## 2 40299 608
## 3 40214 588
## 4 40291 586
## 5 40216 578
## 6 40219 484
## 7 40215 476
## 8 40229 462
## 9 40218 436
## 10 40245 422
zipcodes <- readr::read_csv("data/zipcodes - Sheet1.csv")%>%
clean_names()
## Rows: 10 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): victim zip, lat, long
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
zipcodes%>%
summary()
## victim_zip lat long
## Min. :40214 Min. :38.08 Min. :-85.86
## 1st Qu.:40216 1st Qu.:38.13 1st Qu.:-85.79
## Median :40224 Median :38.16 Median :-85.67
## Mean :40242 Mean :38.16 Mean :-85.68
## 3rd Qu.:40265 3rd Qu.:38.19 3rd Qu.:-85.59
## Max. :40299 Max. :38.26 Max. :-85.45
lat <- c(38.08, 38.26)
long <- c(-85.86, -85.45)
bbox <- make_bbox(long, lat, f=0.05)
map1 <- get_map(bbox, maptype = "terrain", source = "stamen")
## Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL.
ggmap(map1)
ggmap(map1) + geom_point(data=zipcodes, aes(long, lat), color="blue", size=3) + labs(title="Location of Bites Reported", x="Longitude", y="Latitude")
When was data collected?
animal_bites%>%
count(bite_year)%>%
arrange(bite_year)
## # A tibble: 24 × 2
## bite_year n
## <chr> <int>
## 1 1949 2
## 2 1950 2
## 3 1952 2
## 4 1971 2
## 5 1981 2
## 6 1984 2
## 7 1986 2
## 8 2001 6
## 9 2003 2
## 10 2005 2
## # … with 14 more rows
animal_bites$bite_year <- as.factor(animal_bites$bite_year)
animal_bites%>%
filter(bite_year!="5013")%>%
filter(bite_year!="2201")%>%
filter(bite_year!="2101")%>%
filter(bite_year!="1952")%>%
filter(bite_year!="2029")%>%
ggplot(aes(bite_year, fill=bite_month)) + geom_bar() + labs(title="Dates of Data Collection", x= "Bite Year", y="Number of Bites")+ theme_classic() + theme(axis.text.x = element_text(angle=30))
Vaccination rates?
animal_bites%>%
count(vaccination_yrs)
## # A tibble: 5 × 2
## vaccination_yrs n
## <dbl> <int>
## 1 1 2622
## 2 2 2
## 3 3 916
## 4 8 2
## 5 NA 8532
animal_bites%>%
filter(vaccination_yrs=="3")
## # A tibble: 916 × 20
## id bite_year bite_month bite_day species_id_desc breed_id_desc breed
## <dbl> <fct> <chr> <chr> <chr> <chr> <chr>
## 1 622499 2017 02 05 DOG LABRADOR RETRIV MIX
## 2 622499 2017 02 05 DOG LABRADOR RETRIV MIX
## 3 622840 2017 02 10 DOG LABRADOR RETRIV <NA>
## 4 622840 2017 02 10 DOG LABRADOR RETRIV <NA>
## 5 623128 2017 02 13 DOG PIT BULL <NA>
## 6 623128 2017 02 13 DOG PIT BULL <NA>
## 7 623191 2017 02 18 DOG LABRADOR RETRIV <NA>
## 8 623191 2017 02 18 DOG LABRADOR RETRIV <NA>
## 9 623929 2017 03 01 DOG CHICHAUHUA <NA>
## 10 623929 2017 03 01 DOG CHICHAUHUA <NA>
## # … with 906 more rows, and 13 more variables: gender_id_desc <chr>,
## # color <chr>, vaccination_yrs <dbl>, vaccination_date <dttm>,
## # victim_zip <dbl>, adv_issued_yn_desc <chr>, where_bitten_id_desc <chr>,
## # quarantine_date <dttm>, disposition_id_desc <lgl>, head_sent_date <dttm>,
## # release_date <dttm>, results_id_desc <chr>, followup_yn_desc <chr>
animal_bites%>%
filter(results_id_desc=="POSITIVE")
## # A tibble: 24 × 20
## id bite_year bite_month bite_day species_id_desc breed_id_desc breed
## <dbl> <fct> <chr> <chr> <chr> <chr> <chr>
## 1 627719 2017 04 29 DOG GREAT PYRENEESE <NA>
## 2 627719 2017 04 29 DOG GREAT PYRENEESE <NA>
## 3 679267 <NA> <NA> <NA> BAT <NA> <NA>
## 4 679267 <NA> <NA> <NA> BAT <NA> <NA>
## 5 682696 <NA> <NA> <NA> BAT <NA> <NA>
## 6 682696 <NA> <NA> <NA> BAT <NA> <NA>
## 7 682889 <NA> <NA> <NA> BAT <NA> <NA>
## 8 682889 <NA> <NA> <NA> BAT <NA> <NA>
## 9 683374 <NA> <NA> <NA> BAT <NA> <NA>
## 10 683374 <NA> <NA> <NA> BAT <NA> <NA>
## # … with 14 more rows, and 13 more variables: gender_id_desc <chr>,
## # color <chr>, vaccination_yrs <dbl>, vaccination_date <dttm>,
## # victim_zip <dbl>, adv_issued_yn_desc <chr>, where_bitten_id_desc <chr>,
## # quarantine_date <dttm>, disposition_id_desc <lgl>, head_sent_date <dttm>,
## # release_date <dttm>, results_id_desc <chr>, followup_yn_desc <chr>
animal_bites%>%
select(species_id_desc, vaccination_yrs, results_id_desc)%>%
filter(species_id_desc=="DOG" | species_id_desc=="CAT"| species_id_desc=="FERRET"|species_id_desc=="HORSE")%>%
filter(results_id_desc=="POSITIVE" | results_id_desc=="NEGATIVE")
## # A tibble: 362 × 3
## species_id_desc vaccination_yrs results_id_desc
## <chr> <dbl> <chr>
## 1 DOG NA NEGATIVE
## 2 DOG NA NEGATIVE
## 3 DOG NA NEGATIVE
## 4 DOG NA NEGATIVE
## 5 DOG NA NEGATIVE
## 6 DOG NA NEGATIVE
## 7 DOG NA NEGATIVE
## 8 DOG NA NEGATIVE
## 9 DOG NA POSITIVE
## 10 DOG NA POSITIVE
## # … with 352 more rows
#Take Away PLEASE if you ever go into research and imputing data, be accurate when imputing your data. Also try to record your data and not leave things blank. Also please get your pets vaccinated.
#Thank you so much!